## The basic files and libraries needed for most presentations
# creates the libraries and common-functions sections
read_chunk("../common/utility_functions.R")
require(ggplot2) #for plots
require(lattice) # nicer scatter plots
require(plyr) # for processing data.frames
require(grid) # contains the arrow function
require(biOps) # for basic image processing
require(doMC) # for parallel code
require(png) # for reading png images
require(gridExtra)
require(reshape2) # for the melt function
## To install EBImage
# source("http://bioconductor.org/biocLite.R")
# biocLite("EBImage")
require(EBImage) # for more image processing
used.libraries<-c("ggplot2","lattice","plyr","reshape2","grid","gridExtra","biOps","png","EBImage")
# start parallel environment
registerDoMC()
# functions for converting images back and forth
im.to.df<-function(in.img) {
out.im<-expand.grid(x=1:nrow(in.img),y=1:ncol(in.img))
out.im$val<-as.vector(in.img)
out.im
}
df.to.im<-function(in.df,val.col="val",inv=F) {
in.vals<-in.df[[val.col]]
if(class(in.vals[1])=="logical") in.vals<-as.integer(in.vals*255)
if(inv) in.vals<-255-in.vals
out.mat<-matrix(in.vals,nrow=length(unique(in.df$x)),byrow=F)
attr(out.mat,"type")<-"grey"
out.mat
}
ddply.cutcols<-function(...,cols=1) {
# run standard ddply command
cur.table<-ddply(...)
cutlabel.fixer<-function(oVal) {
sapply(oVal,function(x) {
cnv<-as.character(x)
mean(as.numeric(strsplit(substr(cnv,2,nchar(cnv)-1),",")[[1]]))
})
}
cutname.fixer<-function(c.str) {
s.str<-strsplit(c.str,"(",fixed=T)[[1]]
t.str<-strsplit(paste(s.str[c(2:length(s.str))],collapse="("),",")[[1]]
paste(t.str[c(1:length(t.str)-1)],collapse=",")
}
for(i in c(1:cols)) {
cur.table[,i]<-cutlabel.fixer(cur.table[,i])
names(cur.table)[i]<-cutname.fixer(names(cur.table)[i])
}
cur.table
}
show.pngs.as.grid<-function(file.list,title.fun,zoom=1) {
preparePng<-function(x) rasterGrob(readPNG(x,native=T,info=T),width=unit(zoom,"npc"),interp=F)
labelPng<-function(x,title="junk") (qplot(1:300, 1:300, geom="blank",xlab=NULL,ylab=NULL,asp=1)+
annotation_custom(preparePng(x))+
labs(title=title)+theme_bw(24)+
theme(axis.text.x = element_blank(),
axis.text.y = element_blank()))
imgList<-llply(file.list,function(x) labelPng(x,title.fun(x)) )
do.call(grid.arrange,imgList)
}
## Standard image processing tools which I use for visualizing the examples in the script
commean.fun<-function(in.df) {
ddply(in.df,.(val), function(c.cell) {
weight.sum<-sum(c.cell$weight)
data.frame(xv=mean(c.cell$x),
yv=mean(c.cell$y),
xm=with(c.cell,sum(x*weight)/weight.sum),
ym=with(c.cell,sum(y*weight)/weight.sum)
)
})
}
colMeans.df<-function(x,...) as.data.frame(t(colMeans(x,...)))
pca.fun<-function(in.df) {
ddply(in.df,.(val), function(c.cell) {
c.cell.cov<-cov(c.cell[,c("x","y")])
c.cell.eigen<-eigen(c.cell.cov)
c.cell.mean<-colMeans.df(c.cell[,c("x","y")])
out.df<-cbind(c.cell.mean,
data.frame(vx=c.cell.eigen$vectors[1,],
vy=c.cell.eigen$vectors[2,],
vw=sqrt(c.cell.eigen$values),
th.off=atan2(c.cell.eigen$vectors[2,],c.cell.eigen$vectors[1,]))
)
})
}
vec.to.ellipse<-function(pca.df) {
ddply(pca.df,.(val),function(cur.pca) {
# assume there are two vectors now
create.ellipse.points(x.off=cur.pca[1,"x"],y.off=cur.pca[1,"y"],
b=sqrt(5)*cur.pca[1,"vw"],a=sqrt(5)*cur.pca[2,"vw"],
th.off=pi/2-atan2(cur.pca[1,"vy"],cur.pca[1,"vx"]),
x.cent=cur.pca[1,"x"],y.cent=cur.pca[1,"y"])
})
}
# test function for ellipse generation
# ggplot(ldply(seq(-pi,pi,length.out=100),function(th) create.ellipse.points(a=1,b=2,th.off=th,th.val=th)),aes(x=x,y=y))+geom_path()+facet_wrap(~th.val)+coord_equal()
create.ellipse.points<-function(x.off=0,y.off=0,a=1,b=NULL,th.off=0,th.max=2*pi,pts=36,...) {
if (is.null(b)) b<-a
th<-seq(0,th.max,length.out=pts)
data.frame(x=a*cos(th.off)*cos(th)+b*sin(th.off)*sin(th)+x.off,
y=-1*a*sin(th.off)*cos(th)+b*cos(th.off)*sin(th)+y.off,
id=as.factor(paste(x.off,y.off,a,b,th.off,pts,sep=":")),...)
}
deform.ellipse.draw<-function(c.box) {
create.ellipse.points(x.off=c.box$x[1],
y.off=c.box$y[1],
a=c.box$a[1],
b=c.box$b[1],
th.off=c.box$th[1],
col=c.box$col[1])
}
bbox.fun<-function(in.df) {
ddply(in.df,.(val), function(c.cell) {
c.cell.mean<-colMeans.df(c.cell[,c("x","y")])
xmn<-emin(c.cell$x)
xmx<-emax(c.cell$x)
ymn<-emin(c.cell$y)
ymx<-emax(c.cell$y)
out.df<-cbind(c.cell.mean,
data.frame(xi=c(xmn,xmn,xmx,xmx,xmn),
yi=c(ymn,ymx,ymx,ymn,ymn),
xw=xmx-xmn,
yw=ymx-ymn
))
})
}
# since the edge of the pixel is 0.5 away from the middle of the pixel
emin<-function(...) min(...)-0.5
emax<-function(...) max(...)+0.5
extents.fun<-function(in.df) {
ddply(in.df,.(val), function(c.cell) {
c.cell.mean<-colMeans.df(c.cell[,c("x","y")])
out.df<-cbind(c.cell.mean,data.frame(xmin=c(c.cell.mean$x,emin(c.cell$x)),
xmax=c(c.cell.mean$x,emax(c.cell$x)),
ymin=c(emin(c.cell$y),c.cell.mean$y),
ymax=c(emax(c.cell$y),c.cell.mean$y)))
})
}
common.image.path<-"../common"
qbi.file<-function(file.name) file.path(common.image.path,"figures",file.name)
qbi.data<-function(file.name) file.path(common.image.path,"data",file.name)
th_fillmap.fn<-function(max.val) scale_fill_gradientn(colours=rainbow(10),limits=c(0,max.val))
author: Kevin Mader, Christian Dietz date: 19 February 2015 width: 1440 height: 900 css: ../common/template.css transition: rotate
ETHZ: 227-0966-00L
Experimental Design finding the right technique, picking the right dyes and samples has stayed relatively consistent, better techniques lead to more demanding scientits.
Management storing, backing up, setting up databases, these processes have become easier and more automated as data magnitudes have increased
Measurements the actual acquisition speed of the data has increased wildly due to better detectors, parallel measurement, and new higher intensity sources
Post Processing this portion has is the most time-consuming and difficult and has seen minimal improvements over the last years
library("ggthemes")
# guesstimates
time.data<-data.frame(
year=c(2000,2008,2014,2020),
"Experiment Design"=c(10,10,8,8),
"Measurements"=c(50,5,0.5,0.1),
"Management"=c(20,15,10,8),
"Post Processing"=c(50,50,50,50)
)
mtd<-ddply(melt(time.data,id.vars="year"),.(year),
function(x) cbind(x,sum.val=sum(x$value),norm.val=100*x$value/sum(x$value))
)
mtd$variable<-gsub("[.]"," ",mtd$variable)
ggplot(mtd,aes(x=as.factor(year),y=norm.val,fill=variable))+
geom_bar(stat="identity")+
labs(x="Year",y="Relative Time",fill="",title="Experimental Time Breakdown")+
theme_economist(20)
output.df<-data.frame(Year=time.data$year,
Measurements=
365*24/(time.data$Experiment.Design+time.data$Measurements),
Publications=365*24/rowSums(time.data[,c(2:5)])
)
mtd2<-melt(output.df,id.vars="Year")
ggplot(mtd2,aes(x=as.factor(Year),y=value,fill=variable))+
geom_bar(stat="identity",position="dodge")+
scale_y_sqrt()+
labs(x="Year",y="Output",fill="")+
theme_wsj(25)
kable(output.df,digits=0)
| Year | Measurements | Publications |
|---|---|---|
| 2000 | 146 | 67 |
| 2008 | 584 | 110 |
| 2014 | 1031 | 128 |
| 2020 | 1081 | 133 |
To put more real numbers on these scales rather than 'pseudo-publications', the time to measure a terabyte of data is shown in minutes.
mmtb<-data.frame( Year=c(2000,2008,2014,2016), y=1024/c(1/4,60/64,32,8*60) ) names(mmtb)[2]<-"Time to 1 TB in Minutes" kable(mmtb,digits=0)
| Year | Time to 1 TB in Minutes |
|---|---|
| 2000 | 4096 |
| 2008 | 1092 |
| 2014 | 32 |
| 2016 | 2 |
If you looked at one 1000 x 1000 sized image
image(255*matrix(runif(1000*1000),nrow=1000))
# assuming 16 bit images and a 'metric' terabyte
time.per.tb<-1e12/(1000*1000*16/8) / (60*60)
cat("__",
round(time.per.tb),
"__",sep="")
139 hours to browse through a terabyte of data.
names(mmtb)[2]<-"Time to 1 TB" mmtb$"Man power to keep up"<-time.per.tb*60/mmtb$"Time to 1 TB" mmtb[,2]<-paste(round(mmtb[,2]),"min") # add minutes suffix mmtb$"Salary Costs / Month"<-mmtb$"Man power to keep up"*12500 mmtb$"Man power to keep up"<-paste(round(mmtb$"Man power to keep up")," people") mmtb$"Salary Costs / Month"<-paste(round(mmtb$"Salary Costs / Month"/1000)," kCHF") kable(mmtb,digits=0)
| Year | Time to 1 TB | Man power to keep up | Salary Costs / Month |
|---|---|---|---|
| 2000 | 4096 min | 2 people | 25 kCHF |
| 2008 | 1092 min | 8 people | 95 kCHF |
| 2014 | 32 min | 260 people | 3255 kCHF |
| 2016 | 2 min | 3906 people | 48828 kCHF |
\[ \textrm{Transistors} \propto 2^{T/(\textrm{18 months})} \]
# stolen from https://gist.github.com/humberto-ortiz/de4b3a621602b78bf90d
moores.txt<-c("Id Name Year Count(1000s) Clock(MHz)\n",
"0 MOS65XX 1975 3.51 14\n",
"1 Intel8086 1978 29.00 10\n",
"2 MIPSR3000 1988 120.00 33\n",
"3 AMDAm486 1993 1200.00 40\n",
"4 NexGenNx586 1994 3500.00 111\n",
"5 AMDAthlon 1999 37000.00 1400\n",
"6 IntelPentiumIII 1999 44000.00 1400\n",
"7 PowerPC970 2002 58000.00 2500\n",
"8 AMDAthlon64 2003 243000.00 2800\n",
"9 IntelCore2Duo 2006 410000.00 3330\n",
"10 AMDPhenom 2007 450000.00 2600\n",
"11 IntelCorei7 2008 1170000.00 3460\n",
"12 IntelCorei5 2009 995000.00 3600")
moores.pt.text<-gsub("zZ","\n",gsub("\\s+",",",paste(moores.txt,collapse="zZ")))
moores.df<-read.csv(text=moores.pt.text)
names(moores.df)[c(4,5)]<-c("kTransistors","Speed")
moores.df<-moores.df[,c(2:5)]
library(scales)
kscale_y_log10<-function(min=10,max=1e7,steps=6)
scale_y_log10(
breaks = trans_breaks('log10', function(x) 10^x,5)(c(min:max)),
minor_breaks = trans_breaks('log10',function(x) 10^x,steps),
labels = trans_format('log10', math_format(10^.x)))
moores.table<-melt(moores.df,
id.vars=c("Year","Name"))
moores.law<-function(year) moores.df[1,"kTransistors"]*2^((year-moores.df[1,"Year"])/1.5)
moores.table$variable<-gsub("Speed","Speed (MHz)",moores.table$variable)
ggplot(moores.table,aes(Year,value,color=variable))+
geom_line(aes(linetype="Moore's Law",y=moores.law(moores.table$Year)),color="black",
size=2,alpha=0.8)+
geom_point()+
geom_smooth()+
geom_bar(data=data.frame(x=2005,y=10^4),
aes(x=x,y=y,fill="MHz Saturation"),width=7,stat="identity",
alpha=0.25,color="black")+
#scale_y_log10()+
kscale_y_log10()+
xlim(1975,2012)+
labs(color="",fill="",y="",linetype="")+
theme_economist(20)
There are now many more transistors inside a single computer but the processing speed hasn't increased. How can this be?
cloudCostGen<-function(cloudPeakCost,cloudSpotCost) function(hours,peakRatio=1)
hours*peakRatio*cloudPeakCost+hours*(1-peakRatio)*cloudSpotCost
localCostGen<-function(compCost,energyPriceStd,compPowerRaw) {
energyPrice<-energyPriceStd/1e6 # $ / Whr
function(hours)
compCost+energyPrice*compPowerRaw*hours
}
make.res.table<-function(cloudCost,localCost, years) {
ldply(years,function(life) {
timef<-seq(0,life*24*365,length.out=50)
data.frame(life=life*12,hours=timef,
hoursPerWeek=timef/(life*52),
WorstCaseCloud=cloudCost(timef,1),
BestCaseCloud=cloudCost(timef,0),
MiddleCaseCloud=cloudCost(timef,0.5),
LocalWorkstation=localCost(timef))
})
}
random.sim<-function(cloudCost,localCost,n.guess) {
utility<-runif(n.guess,min=0,max=1)
serviceYears<-runif(n.guess,min=1.5,max=6.5)
peak<-runif(n.guess,min=0,max=1)
hours<-serviceYears*utility*24*365
ot<-data.frame(hours=hours,
years=serviceYears,
months=serviceYears*12,
ryears=round(serviceYears),
utility=utility,
peak=peak,
clcost = cloudCost(hours,peak),
wscost = localCost(hours)
)
ot$cloudPremium<-with(ot,clcost/wscost*100)
ot
}
scen.fcn<-function(c.pts) {
data.frame(
clcost=mean(c.pts$clcost),
wscost=mean(c.pts$wscost)
)
}
scen.grid<-function(scen.table.raw) {
ot<-ddply.cutcols(scen.table.raw,.(cut_interval(utility,7),
cut_interval(peak,7),
cut_interval(months,4)),cols=3,
scen.fcn
)
ot$cloudPremium<-with(ot,clcost/wscost*100)
ot
}
scen.table<-function(scen.table.raw) {
ot<-ddply.cutcols(scen.table.raw,.(cut_interval(utility,25),
cut_interval(peak,5),
cut_interval(months,5)),cols=3,
scen.fcn
)
ot$cloudPremium<-with(ot,clcost/wscost*100)
ddply(ot,.(peak,months),function(c.pts) {
n.pts<-subset(c.pts,clcost<=wscost)
subset(n.pts,utility==max(n.pts$utility))
})
}
compCost<-3999.99 # http://www.bestbuy.com/site/cybertronpc-desktop-intel-core-i7-64gb-memory-2tb-hdd-120gb-solid-state-drive-120gb-solid-state-drive-black-blue/6357048.p?id=1219209052767&skuId=6357048 compPowerRaw<-190 #Watts http://www.eu-energystar.org/en/en_008.shtml#use energyPriceStd<-0.143 # Eu/KWHr http://www.eu-energystar.org/en/en_008.shtml#use energyPrice<-energyPriceStd/1000*1.2 # $ / Whr cloudPeakCost<-0.780 cloudSpotCost<-0.097 cloudCost<-function(hours,peakRatio=1) hours*peakRatio*cloudPeakCost+hours*(1-peakRatio)*cloudSpotCost localCost<-function(hours) compCost+energyPrice*compPowerRaw*hours
http://www-inst.eecs.berkeley.edu/~cs61c/sp14/ “The Case for Energy-Proportional Computing,” Luiz André Barroso, Urs Hölzle, IEEE Computer, December 2007
The figure shows the range of cloud costs (determined by peak usage) compared to a local workstation with utilization shown as the average number of hours the computer is used each week.
res.table<-
ldply(c(3),function(life) {
timef<-seq(0,life*24*365,length.out=50)
data.frame(life=life*12,hours=timef,
hoursPerWeek=timef/(life*52),
WorstCaseCloud=cloudCost(timef,1),
BestCaseCloud=cloudCost(timef,0),
MiddleCaseCloud=cloudCost(timef,0.5),
LocalWorkstation=localCost(timef))
})
ggplot(res.table,
aes(x=hoursPerWeek))+
geom_line(aes(y=LocalWorkstation,color="Local Workstation"),size=2)+
geom_errorbar(aes(ymin=WorstCaseCloud,ymax=BestCaseCloud,color="Cloud"))+
labs(x="Average Computer Usage (hr/wk)",y="Cost ($)",color="Solution")+
facet_wrap(~life)+
theme_bw(25)
The figure shows the cost of a cloud based solution as a percentage of the cost of buying a single machine. The values below 1 show the percentage as a number. The panels distinguish the average time to replacement for the machines in months
n.guess<-1e6
utility<-runif(n.guess,min=0,max=1)
serviceYears<-runif(n.guess,min=1.5,max=6.5)
peak<-runif(n.guess,min=0,max=1)
hours<-serviceYears*utility*24*365
scen.table.raw<-data.frame(hours=hours,
years=serviceYears,
months=serviceYears*12,
ryears=round(serviceYears),
utility=utility,
peak=peak,
clcost = cloudCost(hours,peak),
wscost = localCost(hours)
)
scen.fcn<-function(c.pts) {
data.frame(
clcost=mean(c.pts$clcost),
wscost=mean(c.pts$wscost)
)
}
scen.table<-ddply.cutcols(scen.table.raw,.(cut_interval(utility,7),
cut_interval(peak,7),
cut_interval(months,4)),cols=3,
scen.fcn
)
scen.table$cloudPremium<-with(scen.table,clcost/wscost*100)
ggplot(scen.table,aes(y=utility*100,x=peak*100,fill=cloudPremium))+
geom_raster()+
geom_text(aes(label=ifelse(cloudPremium<100,round(cloudPremium),"")))+
labs(x="Peak Usage (%)",y="Utilization (%)",fill="Cloud Costs\n(% of Workstation)")+
scale_fill_gradientn(colours=rainbow(4))+
facet_wrap(~months)+
theme_bw(10)
Here the equal cost point is shown where the cloud and local workstations have the same cost. The x-axis is the percentage of resources used at peak-time and the y shows the expected usable lifetime of the computer. The color indicates the utilization percentage and the text on the squares shows this as the numbers of hours used in a week.
scen.table<-ddply.cutcols(scen.table.raw,.(cut_interval(utility,25),
cut_interval(peak,5),
cut_interval(months,5)),cols=3,
scen.fcn
)
scen.table.sub<-ddply(scen.table,.(peak,months),function(c.pts) {
n.pts<-subset(c.pts,clcost<=wscost)
subset(n.pts,utility==max(n.pts$utility))
})
ggplot(scen.table.sub,aes(100*peak,months,fill=100*utility))+
geom_raster(alpha=0.65)+
geom_text(aes(label=round(utility*168)))+
labs(x="% Peak Time",y="Computer Lifetime\n(Months)",fill="Utilization (%)")+
scale_fill_gradientn(colours=rainbow(2))+
theme_bw(20)
source('../common/schedule.R')
ksable<-function(in.df,...) {
rownames(in.df)<-1:nrow(in.df)
kable(in.df,...)
}
kable(course.desc[c(1:3),])
| Lecture | Description | Applications |
|---|---|---|
| 19th February - Introduction and Workflows | Basic overview of the course, introduction to the basics of images and their acquisition, the importance of reproducibility and why workflows make sense for image processing | Calculating the intensity for a folder full of images |
| 26th February - Image Enhancement (A. Kaestner) | Overview of what techniques are available for assessing and improving the quality of images, specifically various filters, when to apply them, their side-effects, and how to apply them correctly | Removing detector noise from neutron images to distinguish different materials |
| 5th March - Basic Segmentation, Discrete Binary Structures | How to convert images into structures, starting with very basic techniques like threshold and exploring several automated techniques | Identify cells from noise, background, and dust |
ksable(course.desc[c(3:5),])
| Lecture | Description | Applications |
|---|---|---|
| 5th March - Basic Segmentation, Discrete Binary Structures | How to convert images into structures, starting with very basic techniques like threshold and exploring several automated techniques | Identify cells from noise, background, and dust |
| 12th March - Advanced Segmentation | More advanced techniques for extracting structures including basic clustering and classification approaches, and component labeling | Identifying fat and ice crystals in ice cream images |
| 19th March - Machine Learning in Image Processing (M. Jäggi and A. Lucchi) | Applying more advanced techniques from the field of Machine Learning to image processing segmentation and analysis like Support vector machines (SVM) and Markov Random Fields (MRF) | Training an algorithm to automatically identify cells |
ksable(course.desc[c(6:8),])
| Lecture | Description | Applications |
|---|---|---|
| 26th March - Analyzing Single Objects | The analysis and characterization of single structures/objects after they have been segmented, including shape and orientation | Count cells and determine their average shape and volume |
| 2nd April - Analyzing Complex Objects | What techniques are available to analyze more complicated objects with poorly defined 'shape' using Distance maps, Thickness maps, and Voronoi tesselation | Seperate clumps of cells, analyze vessel networks, trabecular bone, and other similar structures |
| 16th April - Spatial Distribution | Extracting meaningful information for a collection of objects like their spatial distribution, alignment, connectivity, and relative positioning | Quantify cells as being evenly spaced or tightly clustered or organized in sheets |
ksable(course.desc[c(9:11),])
| Lecture | Description | Applications |
|---|---|---|
| 23rd April - Statistics and Reproducibility | Making a statistical analysis from quantified image data, and establishing the precision of the metrics calculated, also more coverage of the steps to making an analysis reproducible | Determine if/how different a cancerous cell is from a healthly cell properly |
| 30th April - Dynamic Experiments | Performing tracking and registration in dynamic, changing systems covering object and image based methods | Turning a video of foam flow into metrics like speed, average deformation, and reorganization |
| 7th May - Scaling Up / Big Data | Performing large scale analyses on clusters and cloud-based machines and an introduction of how to work with 'big data' frameworks | Performing large scale analyses using ETHs clusters and Amazons Cloud Resources, how to do anything with a terabytes of data |
ksable(course.desc[c(12:13),])
| Lecture | Description | Applications |
|---|---|---|
| 21th May - Guest Lecture, Applications in Material Science | Application of the course material to an actual scientific project from material science where we reproduce the results of a publication | |
| 28th May - Project Presentations | The presentations of the student projects done in the class |
A very abstract definition: A pairing between spatial information (position) and some other kind of information (value).
In most cases this is a 2 dimensional position (x,y coordinates) and a numeric value (intensity)
basic.image<-im.to.df(matrix(round(runif(5*5,0,100)),nrow=5)) names(basic.image)[3]<-"Intensity" kable(head(basic.image))
| x | y | Intensity |
|---|---|---|
| 1 | 1 | 87 |
| 2 | 1 | 31 |
| 3 | 1 | 12 |
| 4 | 1 | 90 |
| 5 | 1 | 60 |
| 1 | 2 | 87 |
This can then be rearranged from a table form into an array form and displayed as we are used to seeing images
ggplot(basic.image,aes(x,y))+
geom_tile(fill="yellow",color="black")+
geom_text(aes(label=paste("(",x,",",y,")\n",Intensity)))+
coord_equal()+
theme_bw(15)
The next step is to apply a color map (also called lookup table, LUT) to the image so it is a bit more exciting
simple.image<-ggplot(basic.image,aes(x,y,fill=Intensity))+
geom_tile(color="black")+
geom_text(aes(label=paste("(",x,",",y,")\n",round(Intensity))))+
coord_equal()+
theme_bw(15)
simple.image
Which can be arbitrarily defined based on how we would like to visualize the information in the image
simple.image+ scale_fill_gradientn(colours=rainbow(6))
simple.image+
scale_fill_gradientn(colours=c("black","red","white"),trans="log")
Formally a lookup table is a function which \[ f(\textrm{Intensity}) \rightarrow \textrm{Color} \]
ggplot(basic.image,aes(x=Intensity,y=Intensity))+
geom_bar(aes(fill=Intensity),stat="identity",position="dodge",color="black",width=2)+
scale_fill_gradientn(colours=rainbow(6))+
labs(y="Color")+
theme_bw(20)+
theme(axis.ticks = element_blank(),
axis.text.y = element_blank())
These transformations can also be non-linear as is the case of the graph below where the mapping between the intensity and the color is a \(\log\) relationship meaning the the difference between the lower values is much clearer than the higher ones
ggplot(basic.image,aes(x=Intensity,y=log(Intensity)))+
geom_bar(aes(fill=Intensity),stat="identity",position="dodge",color="black",width=2)+
scale_fill_gradientn(colours=c("black","red","white"),trans="log")+
labs(y="Color")+
theme_bw(20)+
theme(axis.ticks = element_blank(),
axis.text.y = element_blank())
On a real image the difference is even clearer
bone.img<-t(readPNG(qbi.file("tiny-bone.png")))
attr(bone.img,"type")<-"grey"
meas.img<-im.to.df(flip(gblur(bone.img,3)))
both.img<-rbind(
cbind(mutate(meas.img,val=0.9*val+0.4),src="Normal"),
cbind(mutate(meas.img,val=log10(40*val)),src="Log-transformed")
)
ggplot(both.img,aes(x,y,fill=val))+
geom_tile()+
coord_equal()+
facet_wrap(~src)+
scale_fill_gradientn(colours=c("black","red","white"))+
#guides(fill=F)+
labs(x="",y="")+
theme_bw(20)+
theme(axis.ticks = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank())
For a 3D image, the position or spatial component has a 3rd dimension (z if it is a spatial, or t if it is a movie)
basic.image<-expand.grid(x=1:3,y=1:3,z=1:3) basic.image$Intensity<-round(runif(nrow(basic.image),0,100)) kable(head(basic.image))
| x | y | z | Intensity |
|---|---|---|---|
| 1 | 1 | 1 | 25 |
| 2 | 1 | 1 | 87 |
| 3 | 1 | 1 | 39 |
| 1 | 2 | 1 | 60 |
| 2 | 2 | 1 | 43 |
| 3 | 2 | 1 | 72 |
This can then be rearranged from a table form into an array form and displayed as a series of slices
ggplot(basic.image,aes(x,y))+
geom_tile(aes(fill=Intensity),color="black",alpha=0.8)+
geom_text(aes(label=paste("(",x,",",y,",",z,")\n",Intensity)))+
facet_grid(z~.)+
guides(fill=F)+
scale_fill_gradientn(colours=rainbow(4))+
theme_bw(15)
In the images thus far, we have had one value per position, but there is no reason there cannot be multiple values. In fact this is what color images are (red, green, and blue) values and even 4 channels with transparency (alpha) as a different. For clarity we call the dimensionality of the image the number of dimensions in the spatial position, and the depth the number in the value.
basic.image<-expand.grid(x=1:5,y=1:5) basic.image$Intensity<-round(runif(nrow(basic.image),0,100)) basic.image$Transparency<-round(runif(nrow(basic.image),0,100)) kable(head(basic.image))
| x | y | Intensity | Transparency |
|---|---|---|---|
| 1 | 1 | 53 | 3 |
| 2 | 1 | 95 | 94 |
| 3 | 1 | 64 | 18 |
| 4 | 1 | 43 | 44 |
| 5 | 1 | 14 | 35 |
| 1 | 2 | 37 | 73 |
This can then be rearranged from a table form into an array form and displayed as a series of slices
ggplot(basic.image,aes(x,y))+
geom_tile(aes(fill=Intensity,alpha=Transparency),color="black")+
geom_text(aes(
label=paste("(",x,",",y,")\n","I:",Intensity,"\nT:",Transparency)
))+
scale_fill_gradientn(colours=rainbow(4))+
theme_bw(15)
At each point in the image (black dot), instead of having just a single value, there is an entire spectrum. A selected group of these (red dots) are shown to illustrate the variations inside the sample. While certainly much more complicated, this still constitutes and image and requires the same sort of techniques to process correctly.
hyper.path<-qbi.data
map.img<-t(readJpeg(hyper.path("raw.jpg"))[,,3]+readJpeg(hyper.path("raw.jpg"))[,,1])#,1,rev)
map.df<-im.to.df(map.img)
impos<-read.table(hyper.path("impos.csv"),sep=",")
names(impos)<-c("x","y")
ggplot(map.df,aes(x,y))+
geom_raster(aes(fill=val))+
#geom_tile(data=impos,fill="red",alpha=0.5)+
geom_point(data=impos,alpha=0.7)+
geom_point(data=subset(impos, abs(x-252)<20 & abs(y-293)<20),color="red")+
labs(fill="White Field\nIntensity")+
xlim(min(impos$x),max(impos$x))+
ylim(min(impos$y),max(impos$y))+
coord_equal()+
theme_bw(20)#,aes(fill="Scanned"))
full.df<-read.csv(hyper.path("full_img.csv"))
full.df<-subset(full.df,wavenum<2500)
ggplot(
subset(full.df, abs(x-252)<20 & abs(y-293)<20),aes(wavenum,val)
)+
geom_line()+
facet_grid(x~y)+
labs(x=expression(paste("Wavenumber (",cm^-1,")",sep="")),y="Intensity (au)")+
theme_bw(10)
in.table<-read.delim(text="Modality\tImpulse Characteristic Response Detection Light Microscopy White Light Electronic interactions Absorption Film, Camera Phase Contrast Coherent light Electron Density (Index of Refraction) Phase Shift Phase stepping, holography, Zernike Confocal Microscopy Laser Light Electronic Transition in Fluorescence Molecule Absorption and reemission Pinhole in focal plane, scanning detection X-Ray Radiography X-Ray light Photo effect and Compton scattering Absorption and scattering Scintillator, microscope, camera Ultrasound High frequency sound waves Molecular mobility Reflection and Scattering Transducer MRI Radio-frequency EM Unmatched Hydrogen spins Absorption and reemission RF coils to detect Atomic Force Microscopy Sharp Point Surface Contact Contact, Repulsion Deflection of a tiny mirror",header=T) kable(in.table,caption="Various modalities and their ways of being recorder")
| Modality | Impulse | Characteristic | Response | Detection |
|---|---|---|---|---|
| Light Microscopy | White Light | Electronic interactions | Absorption | Film, Camera |
| Phase Contrast | Coherent light | Electron Density (Index of Refraction) | Phase Shift | Phase stepping, holography, Zernike |
| Confocal Microscopy | Laser Light | Electronic Transition in Fluorescence Molecule | Absorption and reemission | Pinhole in focal plane, scanning detection |
| X-Ray Radiography | X-Ray light | Photo effect and Compton scattering | Absorption and scattering | Scintillator, microscope, camera |
| Ultrasound | High frequency sound waves | Molecular mobility | Reflection and Scattering | Transducer |
| MRI | Radio-frequency EM | Unmatched Hydrogen spins | Absorption and reemission | RF coils to detect |
| Atomic Force Microscopy | Sharp Point | Surface Contact | Contact, Repulsion | Deflection of a tiny mirror |
bone.img<-t(readPNG(qbi.file("tiny-bone.png")))
attr(bone.img,"type")<-"grey"
meas.img<-im.to.df(flip(gblur(bone.img,3)))
both.img<-rbind(
cbind(meas.img,src="Measured"),
cbind(im.to.df(bone.img),src="Reconstructed")
)
ggplot(both.img,aes(x,y,fill=val))+
geom_tile()+
coord_equal()+
facet_wrap(~src)+
th_fillmap.fn(2)+
guides(fill=F)+
labs(x="",y="")+
theme_bw(20)+
theme(axis.ticks = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank())
##
fftshift2<-function (y)
{
nx = nrow(y)
nx2 = floor(nx/2)
ny = ncol(y)
ny2 = floor(ny/2)
y[c((nx2+1):nx,1:nx2),c((ny2+1):ny,1:ny2)]
}
scat.img<-im.to.df(abs(fftshift2(fft(bone.img))))
scat.img<-mutate(scat.img,val=log(val)/3.5)
both.img<-rbind(
cbind(scat.img,src="Measured"),
cbind(im.to.df(bone.img),src="Reconstructed")
)
ggplot(both.img,aes(x,y,fill=val))+
geom_tile()+
coord_equal()+
facet_wrap(~src)+
th_fillmap.fn(2)+
guides(fill=F)+
labs(x="",y="")+
theme_bw(20)+
theme(axis.ticks = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank())
Copyright 2003-2013 J. Konrad in EC520 lecture, reused with permission
\[ \left[\left([b(x,y)*s_{ab}(x,y)]\otimes h_{fs}(x,y)\right)*h_{op}(x,y)\right]*h_{det}(x,y)+d_{dark}(x,y) \]
\(s_{ab}\) is the only information you are really interested in, so it is important to remove or correct for the other components
For color (non-monochromatic) images the problem becomes even more complicated \[ \int_{0}^{\infty} {\left[\left([b(x,y,\lambda)*s_{ab}(x,y,\lambda)]\otimes h_{fs}(x,y,\lambda)\right)*h_{op}(x,y,\lambda)\right]*h_{det}(x,y,\lambda)}\mathrm{d}\lambda+d_{dark}(x,y) \]
Inspired by: imagej-pres
Proper processing and quantitative analysis is however much more difficult with images.
Furthermore in image processing there is a plethora of tools available
Thousands of algorithms available
Thousands of tools
Many images require multi-step processing
Experimenting is time-consuming
Science demands repeatability! and really wants reproducability
Easy to follow the list, anyone with the right steps can execute and repeat (if not reproduce) the soup
Here it is harder to follow and you need to carefully keep track of what is being performed
library(igraph)
node.names<-c("Buy\nvegetables","Buy meat","Chop\nvegetables","Heat water","Wait for\nboiling","Wait 5\nadd meat")
c.mat<-matrix(0,length(node.names),length(node.names))
for(i in c(1:(length(node.names)-1))) c.mat[i,i+1]<-1
colnames(c.mat)<-node.names
rownames(c.mat)<-node.names
g<-graph.adjacency(c.mat,mode="directed")
V(g)$degree <- degree(g)
V(g)$label <- V(g)$name
V(g)$color <- "lightblue"
V(g)$size<-80
E(g)$width<-2
E(g)$color<-"black"
plot(g,layout=layout.circle)#, layout= layout.kamada.kawai) ##
new.nodes<-c(node.names,"Mix carrots\npotatoes","add egg\nand fry","tenderize\nmeat",
"add tomatoes\ncook 10min","wait 5 minutes")
c.mat<-matrix(0,length(new.nodes),length(new.nodes))
# old connections
for(i in c(1:(length(new.nodes)-1))) c.mat[i,i+1]<-1
colnames(c.mat)<-new.nodes
rownames(c.mat)<-new.nodes
c.mat[1,2]<-0
c.mat[1,3]<-1
c.mat[2,3]<-0
c.mat[2,9]<-1
c.mat[6,7]<-0
c.mat[6,11]<-1
# chop vegies
c.mat[3,]<-0
c.mat[3,7]<-1
c.mat[8,]<-0
c.mat[8,5]<-1
g<-graph.adjacency(c.mat,mode="directed")
V(g)$degree <- degree(g)
V(g)$label <- V(g)$name
V(g)$color <- "lightblue"
V(g)$size<-40
E(g)$width<-2
E(g)$color<-"black"
plot(g,layout=layout.circle)#, layout= layout.kamada.kawai) ##
Clearly a linear set of instructions is ill-suited for even a fairly easy soup, it is then even more difficult when there are dozens of steps and different pathsways
plot(g,layout= layout.sugiyama(g)$layout)
Furthermore a clean workflow allows you to better parallelize the task since it is clear which tasks can be performed independently
V(g)[c(1,3,7,8)]$color <- "red" V(g)[c(4)]$color <- "green" V(g)[c(2,9,10)]$color <- "yellow" plot(g,layout= layout.sugiyama(g)$layout)